# "Gender Gap in Religiosity and Interfaith Marriage 
# Attitudes: Muslim Migrants in Germany"
# 10.1016/j.ijintrel.2024.102040
# Tolga Tezcan

# Replication code
# 08/09/2024

# 1. SETUP ----
## 1.1. Packages ----
library(tidyverse)   # for data manipulation
library(sjlabelled)  # for labeling variables
library(sjmisc)      # for recoding variables
library(psych)       # for calculating Cronbach's alpha
library(lme4)        # for mixed-effect logistic regression models
library(sjPlot)      # for creating tables of model outputs
library(performance) # for checking the performance of models
library(gridExtra)   # for arranging multiple plots in a grid layout
library(ggeffects)   # for computing marginal effects

## 1.2. Loading data ----
# The data is accessible by request in the GESIS Data Archive at
# https://doi.org/10.4232/1.12023, with the reference number ZA5244.
df <- haven::read_dta("ZA5244_v1-0-0.dta")

## 1.3. Creating data frames ----
### 1.3.1. Creating a copy data frame (df_muslim)----
df_muslim <- df

### 1.3.2. Creating a data frame for Muslim migrants----
df_muslim <- df_muslim %>%
  mutate(muslim = ifelse(V19 %in% c(1, 2), 1, 0)) %>%
  filter(muslim == 1)

# 2. RECODING VARIABLES----
## 2.1. Dependent variables----
### 2.1.1. Son approval----
df_muslim <- df_muslim %>%
  mutate(
    sonapprove = 0,
    sonapprove = case_when(
      V104 == 1 | V105 == 1 ~ 1,
      V104 == 2 & V105 == 2 ~ 0,
      V104 == 8 | V105 == 8 ~ NA_real_,
      TRUE ~ sonapprove
    )
  )
df_muslim$sonapprove <- set_label(df_muslim$sonapprove, "son approval")
df_muslim$sonapprove <- to_factor(df_muslim$sonapprove)

### 2.1.2. Daughter approval----
df_muslim <- df_muslim %>%
  mutate(
    daugapprove = 0,
    daugapprove = case_when(
      V106 == 1 | V107 == 1 ~ 1,
      V106 == 2 & V107 == 2 ~ 0,
      V106 == 8 | V107 == 8 ~ NA_real_,
      TRUE ~ daugapprove
    )
  )
df_muslim$daugapprove <- set_label(df_muslim$daugapprove, "daughter approval")
df_muslim$daugapprove <- to_factor(df_muslim$daugapprove)

## 2.2. Control variables----
### 2.2.1. Age----
df_muslim <- df_muslim %>%
  mutate(age = 2008 - V1) %>%
  mutate(age = ifelse(age %in% c(-7991, -6880), NA, age))
df_muslim$age <- set_label(df_muslim$age, "age")

### 2.2.2. Gender----
df_muslim$female <- recode_var(df_muslim, "V11",
                               "2=1 [female]; 1=0 [male]; NA=NA",
                               "female")
df_muslim$female <- to_factor(df_muslim$female)

### 2.2.3. Marital status----
df_muslim$married <- recode_var(df_muslim, "V10",
                                "2=1 [married]; 1,3,4,5=0 [nonmarried]; 9,NA=NA",
                                "married")
df_muslim$married <- to_factor(df_muslim$married)

### 2.2.4. Having a son----
df_muslim <- df_muslim %>%
  mutate(
    havingson = 0,
    havingson = if_else(V86p1 == 1 & V87p1 == 1, 1L, havingson, missing = 0)
  )

df_muslim$havingson <- set_label(df_muslim$havingson, "having son")
df_muslim$havingson <- to_factor(df_muslim$havingson)

### 2.2.5. Having a daughter----
df_muslim <- df_muslim %>%
  mutate(
    havingdaughter = 0,
    havingdaughter = if_else(V86p1 == 1 & V87p1 == 2, 1L, havingdaughter, missing = 0)
  )

df_muslim$havingdaughter <- set_label(df_muslim$havingdaughter, "having daughter")
df_muslim$havingdaughter <- to_factor(df_muslim$havingdaughter)

### 2.2.6. First generation----
df_muslim$firstgen <- recode_var(df_muslim, "gen",
                                 "1=1 [firstgen]; 2=0 [secondgen]; 3,4,NA=NA",
                                 "first generation")
df_muslim$firstgen <- to_factor(df_muslim$firstgen)

### 2.2.7. Education----
df_muslim <- df_muslim %>%
  mutate(
    educinger = case_when(
      V46 == 1 ~ 0L,
      V46 %in% c(2, 3, 4) ~ 1L,
      V46 == 9 | is.na(V46) ~ NA_integer_
    ),
    educ = NA_integer_,
    educ = case_when(
      V46 == 2 ~ 1L,
      V46 == 3 ~ 2L,
      V46 == 4 & V47 %in% c(1, 2, 3, 5) ~ 3L,
      V46 == 4 & V47 == 4 ~ 0L,
      TRUE ~ educ 
    ),
    educ = case_when(
      V56 == 1 ~ 2L,
      V56 %in% c(2, 3) ~ 3L,
      V56 == 4 ~ 0L,
      TRUE ~ educ
    )
  )

df_muslim$educ <- set_labels(df_muslim$educ, 
                             labels = c("student" = 1,
                                        "no degree" = 2,
                                        "secondary/elementary" = 3,
                                        "high school" = 0))
df_muslim$educ <- set_label(df_muslim$educ, "education")
df_muslim$educ <- to_factor(df_muslim$educ)

### 2.2.8. Employment----
df_muslim$employed <- recode_var(df_muslim, "V58",
                                 "1=1 [employed]; 2,3,4=0 [not employed]; 9,NA=NA",
                                 "employed")
df_muslim$employed <- to_factor(df_muslim$employed)

### 2.2.9. Attachment to the country of origin----
df_muslim$coforigin <- recode_var(df_muslim, "V16",
                                  "1=5 [very strong]; 2=4 [strong]; 3=3 [partly]; 4=2 [little]; 5=1 [not at all]; 8,9,NA=NA",
                                  "attachment to country of origin")
df_muslim$coforigin <- as.numeric(df_muslim$coforigin)

### 2.2.10. Attachment to Germany----
df_muslim$connected_germany <- recode_var(df_muslim, "V17",
                                          "1=5 [very strong]; 2=4 [strong]; 3=3 [partly]; 4=2 [little]; 5=1 [not at all]; 8,9,NA=NA",
                                          "attachment to germany")
df_muslim$connected_germany <- as.numeric(df_muslim$connected_germany)

### 2.2.11. Total contact----
contact_recode <- "1=6 [daily]; 2=5 [less often]; 3=4 [once a week]; 4=3 [svrl times a month]; 5=2 [less often]; 6=1 [not at all]; 9,NA=NA"

df_muslim$contactger_fam <- rec(df_muslim$V12_1, rec = contact_recode, append = FALSE)
df_muslim$contactger_wrk <- rec(df_muslim$V12_2, rec = contact_recode, append = FALSE)
df_muslim$contactger_neighb <- rec(df_muslim$V12_3, rec = contact_recode, append = FALSE)
df_muslim$contactger_friend <- rec(df_muslim$V12_4, rec = contact_recode, append = FALSE)

df_muslim <- df_muslim %>%
  mutate(
    contactger_fam = as.numeric(contactger_fam),
    contactger_wrk = as.numeric(contactger_wrk),
    contactger_neighb = as.numeric(contactger_neighb),
    contactger_friend = as.numeric(contactger_friend)
  )

df_muslim <- df_muslim %>%
  rowwise() %>% 
  mutate(totalcontact = mean(c(contactger_fam, contactger_wrk, contactger_neighb, contactger_friend)))

df_muslim$totalcontact <- set_label(df_muslim$totalcontact, "total contact")

indextotalcontact <- df_muslim %>% select(all_of(contact_vars))
print(alpha(indextotalcontact))

### 2.2.12. Religiosity----
#### 2.2.12.1. Subjective religiosity----
df_muslim$subjective_rel <- recode_var(df_muslim, "V30",
                                       "1=1 [not at all]; 2=2 [2]; 3=3 [3]; 4=4 [very]; 9,NA=NA",
                                       "subjective religiosity")
df_muslim$subjective_rel <- as.numeric(df_muslim$subjective_rel)

#### 2.2.12.2. Individual religiosity----
df_muslim$individual_rel <- recode_var(df_muslim, "V34",
                                       "1=7 [daily]; 2=6 [6]; 3=5 [5]; 4=4 [4]; 5=3 [3]; 6=2 [2]; 7=1 [never]; 9,NA=NA",
                                       "individual religiosity")
df_muslim$individual_rel <- as.numeric(df_muslim$individual_rel)

#### 2.2.12.3. Communal religiosity----
df_muslim$communal_rel <- recode_var(df_muslim, "V35",
                                     "1=7 [daily]; 2=6 [6]; 3=5 [5]; 4=4 [4]; 5=3 [3]; 6=2 [2]; 7=1 [never]; 9,NA=NA",
                                     "communal religiosity")
df_muslim$communal_rel <- as.numeric(df_muslim$communal_rel)

### 2.2.13. Country of origin----
df_muslim$country <- NA
df_muslim$country[df_muslim$v2reg == 2 | (df_muslim$v5reg == 2 & df_muslim$v6reg == 2) | df_muslim$v7_102 == 1] <- 0
df_muslim$country[df_muslim$v2reg == 1 | (df_muslim$v5reg == 1 & df_muslim$v6reg == 1) | df_muslim$v7_101 == 1] <- 1
df_muslim$country[df_muslim$v2reg == 3 | (df_muslim$v5reg == 3 & df_muslim$v6reg == 3) | df_muslim$v7_103 == 1] <- 2
df_muslim$country[df_muslim$v2reg == 4 | (df_muslim$v5reg == 4 & df_muslim$v6reg == 4) | df_muslim$v7_104 == 1] <- 3
df_muslim$country[df_muslim$v2reg == 5 | (df_muslim$v5reg == 5 & df_muslim$v6reg == 5) | df_muslim$v7_105 == 1] <- 4
df_muslim$country[df_muslim$v2reg == 6 | (df_muslim$v5reg == 6 & df_muslim$v6reg == 6) | df_muslim$v7_106 == 1] <- 5
df_muslim$country[df_muslim$v2reg == 7 | (df_muslim$v5reg == 7 & df_muslim$v6reg == 7) | df_muslim$v7_107 == 1] <- 6
df_muslim$country[df_muslim$v2reg == 8 | (df_muslim$v5reg == 8 & df_muslim$v6reg == 8) | df_muslim$v7_108 == 1] <- 7

# 3. MODELS----
## 3.1. Mixed-effects logistic regression models----
glmfit1 <- glmer(sonapprove ~ age + female + married + havingson + havingdaughter + 
                   firstgen + educ + employed + (1 | country), 
                 df_muslim, family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
glmfit2 <- glmer(sonapprove ~ age + female + married + havingson + havingdaughter + 
                   firstgen + educ + employed + (1 | country) + 
                   coforigin + connected_germany + totalcontact, 
                 df_muslim, family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
glmfit3 <- glmer(sonapprove ~ age + female + married + havingson + havingdaughter + 
                   firstgen + educ + employed + (1 | country) + 
                   coforigin + connected_germany + totalcontact + 
                   subjective_rel + individual_rel + communal_rel, 
                 df_muslim, family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
glmfit4 <- glmer(sonapprove ~ age + female + married + havingson + havingdaughter + 
                   firstgen + educ + employed + (1 | country) + 
                   coforigin + connected_germany + totalcontact + 
                   subjective_rel*female+ individual_rel*female + communal_rel*female, 
                 df_muslim, family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
glmfit5 <- glmer(daugapprove ~ age + female + married + havingson + havingdaughter + 
                   firstgen + educ + employed + (1 | country), 
                 df_muslim, family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
glmfit6 <- glmer(daugapprove ~ age + female + married + havingson + havingdaughter + 
                   firstgen + educ + employed + (1 | country) + 
                   coforigin + connected_germany + totalcontact, 
                 df_muslim, family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
glmfit7 <- glmer(daugapprove ~ age + female + married + havingson + havingdaughter + 
                   firstgen + educ + employed + (1 | country) + 
                   coforigin + connected_germany + totalcontact + 
                   subjective_rel + individual_rel + communal_rel, 
                 df_muslim, family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))
glmfit8 <- glmer(daugapprove ~ age + female + married + havingson + havingdaughter + 
                   firstgen + educ + employed + (1 | country) + 
                   coforigin + connected_germany + totalcontact + 
                   subjective_rel*female + individual_rel*female + communal_rel*female, 
                 df_muslim, family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 100000)))

tab_model(glmfit1,glmfit2,glmfit3,glmfit4,glmfit5,glmfit6,glmfit7,glmfit8, collapse.se = TRUE, 
          show.p = TRUE, p.style = "numeric_stars", p.threshold = c(0.05, 0.01, 0.001), show.loglik = TRUE, 
          show.ci = FALSE, show.intercept = TRUE, 
          show.aic = TRUE, show.dev = FALSE, show.se = FALSE, show.obs = TRUE)

## 3.2. Checking the performance of models----
check_collinearity(glmfit1)
check_collinearity(glmfit2)
check_collinearity(glmfit3)
check_collinearity(glmfit5)
check_collinearity(glmfit6)
check_collinearity(glmfit7)
check_model(glmfit4)
check_model(glmfit8)

## 3.3. Calculating ICC and BIC----
var_random <- as.numeric(VarCorr(glmfit8)$country[1])
var_residual <- pi^2 / 3
icc <- var_random / (var_random + var_residual)
print(icc)
model_bic <- BIC(glmfit8)
print(model_bic)

## 3.4. Average marginal effects (AME)----
options(digits=2, scipen=15)
summary(glmfit1)
summary(glmfit2)
summary(glmfit3)
summary(glmfit4)
summary(glmfit5)
summary(glmfit6)
summary(glmfit7)
summary(glmfit8)

# 4. PREDICTED PROBABILITIES----
## 4.1. Setting theme----
set_theme(
  axis.title.size = 1.5,
  geom.linetype =1,
  geom.outline.color = "black", 
  geom.outline.size = 1, 
  geom.label.size = 4,
  geom.label.color = "black",
  title.color = "black", 
  title.size = 3, 
  axis.angle.x = 0, 
  axis.textcolor = "black",
  base = theme_bw()
)

## 4.2. Plots----
pp1 <- plot_model(glmfit4, type = "pred",line.size=2, axis.lim=c(.65, 1), 
                  grid.breaks = 5, colors ="bw", title = "", show.legend = FALSE, ci.lvl = NA,
                  auto.label = FALSE,
                  axis.title = "Approval for sons",
                  axis.labels ="(A) Subjective religiosity", terms = c("subjective_rel", "female"))

pp2 <- plot_model(glmfit4, type = "pred",line.size=2, axis.lim=c(.65, 1), 
                  grid.breaks = 10, colors ="bw", title = "", show.legend = FALSE, ci.lvl = NA,
                  axis.title = "Approval for sons", 
                  axis.labels ="(C) Individual religiosity", terms = c("individual_rel", "female"))

pp3 <- plot_model(glmfit4, type = "pred",line.size=2, axis.lim=c(.50, 1), 
                  grid.breaks = 10, colors ="bw", title = "", show.legend = FALSE, ci.lvl = NA,
                  axis.title = "Approval for sons", 
                  axis.labels ="(E) Communal religiosity", terms = c("communal_rel", "female"))

pp4 <- plot_model(glmfit8, type = "pred",line.size=2, axis.lim=c(.65, 1), 
                  grid.breaks = 10, colors ="bw", title = "", show.legend = FALSE, ci.lvl = NA,
                  axis.title = "Approval for daughters", 
                  axis.labels ="(B) Subjective religiosity", terms = c("subjective_rel", "female"))

pp5 <- plot_model(glmfit8, type = "pred",line.size=2, axis.lim=c(.65, 1), 
                  grid.breaks = 10, colors ="bw", title = "", show.legend = FALSE, ci.lvl = NA,
                  axis.title = "Approval for daughters",
                  axis.labels ="(D) Individual religiosity", terms = c("individual_rel", "female"))

pp6 <- plot_model(glmfit8, type = "pred",line.size=2, axis.lim=c(.50, 1), 
                  grid.breaks = 10, colors ="bw", title = "", show.legend = FALSE, ci.lvl = NA,
                  axis.title = "Approval for daughters",
                  axis.labels ="(F) Communal religiosity", terms = c("communal_rel", "female"))

grid.arrange(pp1, pp4, pp2, pp5, pp3, pp6, ncol = 2, heights = c(1, 1, 1))

## 4.3. Marginal effects----
ggpredict(glmfit4, c("subjective_rel", "female"))
ggpredict(glmfit4, c("individual_rel", "female"))
ggpredict(glmfit4, c("communal_rel", "female"))
ggpredict(glmfit8, c("subjective_rel", "female"))
ggpredict(glmfit8, c("individual_rel", "female"))
ggpredict(glmfit8, c("communal_rel", "female"))